/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part II: Gender, employment, and wages*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/


/*Exercise 7: Wage regressions*/

%MACRO country ;

	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;

		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid own) ;
		RUN ;
		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF	 (0 <= ageyoch < 6)	THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2 ;
				ELSE  achildcat  = 0 ;				
				IF 	 	(achildcat = 1) 			THEN youngchild = 1 ;
				ELSE IF (achildcat in (0,2))		THEN youngchild = 0 ;
				ELSE									 youngchild = . ;
				IF 	 	(achildcat = 2) 			THEN oldchild   = 1 ;
				ELSE IF (achildcat in (0,1))		THEN oldchild   = 0 ;
				ELSE									 oldchild   = . ;
				IF	(100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE					   depemp = . ;	
				agesq = age*age ;	
				FORMAT educ_c ;				
				IF 		(educ in (1,3))		THEN mededuc = 0 ;
				ELSE IF (educ = 2)			THEN mededuc = 1 ;
				ELSE  							 mededuc = . ; 
				IF 		(educ in (1,2))	THEN hieduc  = 0 ;
				ELSE IF (educ = 3)		THEN hieduc  = 1 ;
				ELSE  						 hieduc  = . ;	
						RUN ;
		DATA &ccyy ;
		 SET &ccyy ;	
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
				

		PROC SORT DATA=&ccyy ;
			BY did hourwagelog;
		RUN ;

		PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
			logwage = LOG(hourwage) ;
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;

		%LET i = %EVAL(&i+1) ;
	%END ;

	PROC SORT DATA=current ;
	  BY dname sex ;
	RUN ;

  PROC SURVEYREG;
	 BY dname sex  ;
	 WEIGHT ppopwgt ;
	 MODEL  logwage = age agesq mededuc hieduc immigr partner youngchild oldchild
                     ptime1 homeowner ;
  RUN;

%MEND country ;

PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%country
 
 